# Construct Societal Inequality Variables

##
## GINI (UNU WIID dataset)
## Besser als die World Bank Daten, aber immer noch VIELE MISSINGS -> zu hoher Datenverlust?
##
wiid <- read.xls("Daten/UNU WIID GINI/WIID3b.xls", sheet = 1)
wiid$actorid <- countrycode(wiid$Country, "country.name", "cown", warn = T)
wiid <- subset(wiid, !is.na(wiid$Gini) & wiid$Year > 1988 & wiid$Year < 2010, c(actorid, Year, Gini))
wiid <- aggregate(wiid$Gini, by = list(wiid$actorid, wiid$Year), max, na.rm=T)
wiid <- rename(wiid, c("Group.1"="actorid", "Group.2"="year", "x"="unu_gini"))
Master <- join(Master, wiid, type="left", by=c("actorid", "year"), match="all")


##
## (logged) GDP per capita (constant 2005 US$) &
## GINI Coefficient (World Bank Data)
##

wdilist <- c("SI.POV.GINI",    # Gini Index
             "NY.GDP.PCAP.KD") # GDP per capita (constant 2005 US$)

# Extract latest version of desired variables from WDI.
wdi <- WDI(country="all", indicator = wdilist, extra = FALSE,
           start = 1989, end = 2009)

wdi$actorid <- countrycode(wdi$iso2c, "iso2c", "cown", warn = T)
wdi$actorid[wdi$country=="Kosovo"] <- 347 # Kosovo 
wdi$actorid[wdi$country=="Serbia"] <- 345 # Serbia (Yugoslavia)
wdi$actorid[wdi$country=="South Sudan"] <- 626 # South Sudan (to be sure that it does not match Sudan)
wdi$actorid[wdi$country=="Yemen, Rep."] <- 678 # used in SVAC for Yemen (In CoW: Yemen Arab Republic!)
wdi$actorid[wdi$country=="South Africa"] <- 560 # South Africa
wdi$actorid[wdi$country=="Zimbabwe"] <- 552 # Zimbabwe

wdi$wdi.GDPcap.log <- log(wdi$NY.GDP.PCAP.KD)
wdi <- rename(wdi, c("SI.POV.GINI"="wdi.gini"))

wdi <- subset(wdi, is.na(actorid)==FALSE, 
              select=c(actorid, year, wdi.gini, wdi.GDPcap.log))

Master <- join(Master, wdi, type="left", by=c("actorid", "year"), match="all")
rm(wiid, wdi)

##
## (logged) GDP per capita (constant 2005 US$) - OBSOLETE
##

# load("Daten/World Bank WDI/GDPcap.RData")
# wb_wdi$wb_GDPcap_log <- log(wb_wdi$wb_GDPcap)
# wb_wdi$year <- as.numeric(as.character(wb_wdi$year))
# Master <- join(Master, wb_wdi, type="left", by=c("actorid", "year"), match="all")


## MANUAL DOWNLOAD AND EXTRACTION OF WORLD BANK DATA - OBSOLETE

# wb.gini <- read.csv(file = "Daten/World Bank GINI/gini.csv", header = T, sep = ",")
# wb.gini$actorid <- countrycode(wb.gini$Country.Code, "wb", "cown",warn = T) # add COW-IDs
# wb.gini <- subset(wb.gini, subset = !is.na(wb.gini$actorid)) # delete all rows w/o COW-IDs
# wb.gini <- subset(wb.gini, select = c("actorid", "X1989", "X1990", "X1991", "X1992", 
#                                                 "X1993", "X1994", "X1995", "X1996", "X1997", "X1998", 
#                                                 "X1999", "X2000", "X2001", "X2002", "X2003", "X2004", 
#                                                 "X2005", "X2006", "X2007", "X2008" ,"X2009"))
# 
# wb.gini <- rename(wb.gini, c("actorid"="actorid","X1989"="1989",
#                                        "X1990"="1990","X1991"="1991","X1992"="1992","X1993"="1993","X1994"="1994",
#                                        "X1995"="1995","X1996"="1996","X1997"="1997","X1998"="1998","X1999"="1999",
#                                        "X2000"="2000","X2001"="2001","X2002"="2002","X2003"="2003","X2004"="2004",
#                                        "X2005"="2005","X2006"="2006","X2007"="2007","X2008"="2008","X2009"="2009"))
# 
# wb.gini <- melt(wb.gini, id.vars=c("actorid")) # from long to wide format
# wb.gini <- rename(wb.gini, c("actorid"="actorid", "variable"="year", "value"="wb_gini"))
# wb.gini$year <- as.numeric(as.character(wb.gini$year))
# 
# Master <- join(Master, wb.gini, type="left", by=c("actorid", "year"), match="all")




##
## Import HUGHE WorldBank WDI dataset, process it, extract GDP/capita variable, and save handleable dataset 
## (one time process)
##

# wb_wdi <- read.csv("Daten/World Bank WDI/WDI_Data.csv", sep = ",", header = T)
# wb_wdi <- subset(wb_wdi, Indicator.Name=="GDP per capita (constant 2005 US$)", 
#                  select = c(Country.Code, X1989, X1990, X1991, X1992, X1993, X1994,
#                            X1995, X1996, X1997, X1998, X1999, X2000, X2001, X2002, 
#                            X2003, X2004, X2005, X2006, X2007, X2008, X2009))
# wb_wdi <- rename(wb_wdi, replace = c("X1989"=1989, "X1990"=1990, "X1991"=1991, "X1992"=1992, "X1993"=1993, 
#                                       "X1994"=1994, "X1995"=1995, "X1996"=1996, "X1997"=1997, "X1998"=1998, 
#                                       "X1999"=1999, "X2000"=2000, "X2001"=2001, "X2002"=2002, "X2003"=2003, 
#                                       "X2004"=2004, "X2005"=2005, "X2006"=2006, "X2007"=2007, "X2008"=2008, 
#                                       "X2009"=2009))
# wb_wdi <- melt(data = wb_wdi, id.vars = c("Country.Code"))
# wb_wdi <- rename(wb_wdi, replace = c("Country.Code"="actorid", "variable"="year", "value"="wb_GDPcap"))
# wb_wdi$actorid <- countrycode(wb_wdi$actorid, "wb", "cown", warn = T)
# wb_wdi <- subset(wb_wdi, !is.na(wb_wdi$actorid))
# save(wb_wdi, file = "Daten/World Bank WDI/GDPcap.RData")
# rm(wb_wdi)
